{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 读取原商品"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>movie_id</th>\n",
       "      <th>genres</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>4</td>\n",
       "      <td>Comedy|Drama|Romance</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>8</td>\n",
       "      <td>Adventure|Children</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>9</td>\n",
       "      <td>Action</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>12</td>\n",
       "      <td>Comedy|Horror</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>13</td>\n",
       "      <td>Adventure|Animation|Children</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   movie_id                        genres\n",
       "0         4          Comedy|Drama|Romance\n",
       "1         8            Adventure|Children\n",
       "2         9                        Action\n",
       "3        12                 Comedy|Horror\n",
       "4        13  Adventure|Animation|Children"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_movie_old = pd.read_csv('./data/movie_old.csv',usecols=[0,2])\n",
    "df_movie_old.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 统计原物品中所有的特征"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['Western',\n",
       " 'Action',\n",
       " 'Fantasy',\n",
       " 'Sci-Fi',\n",
       " 'IMAX',\n",
       " 'War',\n",
       " 'Film-Noir',\n",
       " 'Children',\n",
       " 'Animation',\n",
       " 'Documentary',\n",
       " 'Thriller',\n",
       " 'Crime',\n",
       " 'Romance',\n",
       " 'Comedy',\n",
       " 'Horror',\n",
       " 'Mystery',\n",
       " 'Musical',\n",
       " 'Drama',\n",
       " 'Adventure']"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "total_genres = set()\n",
    "for genres in df_movie_old['genres']:\n",
    "    total_genres |= set(genres.split('|'))\n",
    "total_genres = list(total_genres)\n",
    "total_genres"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 读取新物品"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>movie_id</th>\n",
       "      <th>genres</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>14</td>\n",
       "      <td>Drama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>18</td>\n",
       "      <td>Comedy</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>30</td>\n",
       "      <td>Crime|Drama</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>35</td>\n",
       "      <td>Drama|Romance</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>41</td>\n",
       "      <td>Drama|War</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   movie_id         genres\n",
       "0        14          Drama\n",
       "1        18         Comedy\n",
       "2        30    Crime|Drama\n",
       "3        35  Drama|Romance\n",
       "4        41      Drama|War"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_movie_new = pd.read_csv('./data/movie_new.csv',usecols=[0,2])\n",
    "df_movie_new.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 建立原物品和新物品的特征矩阵"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 原物品id-->index\n",
    "movie_old_id_to_index_dict = {}\n",
    "# 新物品id-->index\n",
    "movie_new_id_to_index_dict = {}\n",
    "\n",
    "# 初始化原物品和新物品的特征矩阵\n",
    "movie_old_genres_array = np.zeros(\n",
    "    shape=(len(df_movie_old),len(total_genres))\n",
    ")\n",
    "movie_new_genres_array = np.zeros(\n",
    "    shape=(len(df_movie_new),len(total_genres))\n",
    ")\n",
    "\n",
    "for index in range(len(df_movie_old)):\n",
    "    movie_id = df_movie_old.iloc[index]['movie_id']\n",
    "    movie_old_id_to_index_dict[movie_id] = index\n",
    "    genres = df_movie_old.iloc[index]['genres'].split('|')\n",
    "    # 创建特征行向量\n",
    "    line_data = np.zeros(shape=len(total_genres))\n",
    "    for i in range(len(total_genres)):\n",
    "        if total_genres[i] in genres:\n",
    "            line_data[i] = 1\n",
    "    # 赋值给index行\n",
    "    movie_old_genres_array[index] = line_data\n",
    "\n",
    "for index in range(len(df_movie_new)):\n",
    "    movie_id = df_movie_new.iloc[index]['movie_id']\n",
    "    movie_new_id_to_index_dict[movie_id] = index\n",
    "    genres = df_movie_new.iloc[index]['genres'].split('|')\n",
    "    # 创建特征行向量\n",
    "    line_data = np.zeros(shape=len(total_genres))\n",
    "    for i in range(len(total_genres)):\n",
    "        if total_genres[i] in genres:\n",
    "            line_data[i] = 1\n",
    "    # 赋值给index行\n",
    "    movie_new_genres_array[index] = line_data\n",
    "    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 原物品到新物品的相似度矩阵"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "movie_old_genres_column_sum_array = np.sum(movie_old_genres_array,axis=1)\n",
    "movie_new_genres_column_sum_array = np.sum(movie_new_genres_array,axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 初始化相似度矩阵\n",
    "movie_sim_array = np.zeros(\n",
    "    shape=(len(df_movie_old),len(df_movie_new))\n",
    ")\n",
    "for index in range(len(df_movie_old)):\n",
    "    v1 = np.dot(\n",
    "        movie_old_genres_array[index],movie_new_genres_array.T\n",
    "    )\n",
    "    v2 = np.around(np.sqrt(\n",
    "        movie_old_genres_column_sum_array[index] * movie_new_genres_column_sum_array),3)\n",
    "    movie_sim_array[index] = np.around(v1 / v2,2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0.58, 0.58, 0.41, ..., 0.41, 0.58, 0.  ],\n",
       "       [0.  , 0.  , 0.  , ..., 0.  , 0.  , 0.  ],\n",
       "       [0.  , 0.  , 0.  , ..., 0.71, 0.  , 0.  ],\n",
       "       ...,\n",
       "       [0.71, 0.  , 0.5 , ..., 0.5 , 0.71, 0.  ],\n",
       "       [0.  , 0.  , 0.  , ..., 0.41, 0.  , 0.58],\n",
       "       [0.71, 0.  , 0.5 , ..., 0.5 , 0.71, 0.  ]])"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movie_sim_array"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 导入原物品的打分"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user_id</th>\n",
       "      <th>movie_id</th>\n",
       "      <th>rating</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>1009</td>\n",
       "      <td>3.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>1243</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>1848</td>\n",
       "      <td>3.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>1920</td>\n",
       "      <td>3.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>2118</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   user_id  movie_id  rating\n",
       "0        1      1009     3.5\n",
       "1        1      1243     3.0\n",
       "2        1      1848     3.5\n",
       "3        1      1920     3.5\n",
       "4        1      2118     4.0"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_rating_old = pd.read_csv('./data/rating_old.csv')\n",
    "df_rating_old.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 变更id为index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user_id</th>\n",
       "      <th>movie_id</th>\n",
       "      <th>rating</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>319</td>\n",
       "      <td>3.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>370</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>518</td>\n",
       "      <td>3.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>538</td>\n",
       "      <td>3.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>623</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   user_id  movie_id  rating\n",
       "0        1       319     3.5\n",
       "1        1       370     3.0\n",
       "2        1       518     3.5\n",
       "3        1       538     3.5\n",
       "4        1       623     4.0"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_rating_old['movie_id'] = df_rating_old['movie_id'].apply(lambda movie_id : movie_old_id_to_index_dict[movie_id])\n",
    "df_rating_old.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df_rating_old.columns = ['user_id','movie_index','rating']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 根据用户喜欢的原物品，生成新物品的推荐"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0..100..200..300..400..500..600..700..800..900..1000..1100..1200..1300..1400..1500..1600..1700..1800..1900..2000..2100..2200..2300..2400..2500..2600..2700..2800..2900..3000..3100..3200..3300..3400..3500..3600..3700..3800..3900..4000..4100..4200..4300..4400..4500..4600..4700..4800..4900..5000..5100..5200..5300..5400..5500..5600..5700..5800..5900..6000..6100..6200..6300..6400..6500..6600..6700..6800..6900..7000..7100..7200..7300..7400..7500..7600..7700..7800..7900..8000..8100..8200..8300..8400..8500..8600..8700..8800..8900..9000..9100..9200..9300..9400..9500..9600..9700..9800..9900..10000..10100..10200..10300..10400..10500..10600..10700..10800..10900..11000..11100..11200..11300..11400..11500..11600..11700..11800..11900..12000..12100..12200..12300..12400..12500..12600..12700..12800..12900..13000..13100..13200..13300..13400..13500..13600..13700..13800..13900..14000..14100..14200..14300..14400..14500..14600..14700..14800..14900..15000..15100..15200..15300..15400..15500..15600..15700..15800..15900..16000..16100..16200..16300..16400..16500..16600..16700..16800..16900..17000..17100..17200..17300..17400..17500..17600..17700..17800..17900..18000..18100..18200..18300..18400..18500..18600..18700..18800..18900..19000..19100..19200..19300..19400..19500..19600..19700..19800..19900..20000..20100..20200..20300..20400..20500..20600..20700..20800..20900..21000..21100..21200..21300..21400..21500..21600..21700..21800..21900..22000..22100..22200..22300..22400..22500..22600..22700..22800..22900..23000..23100..23200..23300..23400..23500..23600..23700..23800..23900..24000..24100..24200..24300..24400..24500..24600..24700..24800..24900..25000..25100..25200..25300..25400..25500..25600..25700..25800..25900..26000..26100..26200..26300..26400..26500..26600..26700..26800..26900..27000..27100..27200..27300..27400..27500..27600..27700..27800..27900..28000..28100..28200..28300..28400..28500..28600..28700..28800..28900..29000..29100..29200..29300..29400..29500..29600..29700..29800..29900..30000..30100..30200..30300..30400..30500..30600.."
     ]
    }
   ],
   "source": [
    "user_recommend = {}\n",
    "\n",
    "for index,(user_id,groupby_userid) in enumerate(df_rating_old.groupby('user_id')):\n",
    "    movies_rating = groupby_userid.groupby('movie_index')['rating'].mean().sort_values(ascending=False)\n",
    "    user_fav = movies_rating[\n",
    "        movies_rating >= 4\n",
    "    ].index.tolist()\n",
    "    user_recommend[user_id] = set(np.where(\n",
    "        movie_sim_array[user_fav] >= 0.85)[1].tolist()[:100])\n",
    "    if index % 100 == 0:print(index,end='..')\n",
    "        "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 读物用户对新物品的实际打分"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user_id</th>\n",
       "      <th>movie_id</th>\n",
       "      <th>rating</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>1217</td>\n",
       "      <td>3.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>1348</td>\n",
       "      <td>3.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>1350</td>\n",
       "      <td>3.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>2138</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>2143</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   user_id  movie_id  rating\n",
       "0        1      1217     3.5\n",
       "1        1      1348     3.5\n",
       "2        1      1350     3.5\n",
       "3        1      2138     4.0\n",
       "4        1      2143     4.0"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_rating_new = pd.read_csv('./data/rating_new.csv')\n",
    "df_rating_new.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### id变index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user_id</th>\n",
       "      <th>movie_index</th>\n",
       "      <th>rating</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>148</td>\n",
       "      <td>3.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>168</td>\n",
       "      <td>3.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>169</td>\n",
       "      <td>3.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>259</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>261</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   user_id  movie_index  rating\n",
       "0        1          148     3.5\n",
       "1        1          168     3.5\n",
       "2        1          169     3.5\n",
       "3        1          259     4.0\n",
       "4        1          261     4.0"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_rating_new['movie_id'] = df_rating_new['movie_id'].apply(lambda movie_id : movie_new_id_to_index_dict[movie_id])\n",
    "df_rating_new.columns = ['user_id','movie_index','rating']\n",
    "df_rating_new.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 得到用户真正喜欢的新物品"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0..100..200..300..400..500..600..700..800..900..1000..1100..1200..1300..1400..1500..1600..1700..1800..1900..2000..2100..2200..2300..2400..2500..2600..2700..2800..2900..3000..3100..3200..3300..3400..3500..3600..3700..3800..3900..4000..4100..4200..4300..4400..4500..4600..4700..4800..4900..5000..5100..5200..5300..5400..5500..5600..5700..5800..5900..6000..6100..6200..6300..6400..6500..6600..6700..6800..6900..7000..7100..7200..7300..7400..7500..7600..7700..7800..7900..8000..8100..8200..8300..8400..8500..8600..8700..8800..8900..9000..9100..9200..9300..9400..9500..9600..9700..9800..9900..10000..10100..10200..10300..10400..10500..10600..10700..10800..10900..11000..11100..11200..11300..11400..11500..11600..11700..11800..11900..12000..12100..12200..12300..12400..12500..12600..12700..12800..12900..13000..13100..13200..13300..13400..13500..13600..13700..13800..13900..14000..14100..14200..14300..14400..14500..14600..14700..14800..14900..15000..15100..15200..15300..15400..15500..15600..15700..15800..15900..16000..16100..16200..16300..16400..16500..16600..16700..16800..16900..17000..17100..17200..17300..17400..17500..17600..17700..17800..17900..18000..18100..18200..18300..18400..18500..18600..18700..18800..18900..19000..19100..19200..19300..19400..19500..19600..19700..19800..19900..20000..20100..20200..20300..20400..20500..20600..20700..20800..20900..21000..21100..21200..21300..21400..21500..21600..21700..21800..21900..22000..22100..22200..22300..22400..22500..22600..22700..22800..22900..23000..23100..23200..23300..23400..23500..23600..23700..23800..23900..24000..24100..24200..24300..24400..24500..24600..24700..24800..24900..25000..25100..25200..25300..25400..25500..25600..25700..25800..25900..26000..26100..26200..26300..26400..26500..26600..26700..26800..26900..27000..27100..27200..27300..27400..27500..27600..27700..27800..27900..28000..28100..28200..28300..28400..28500..28600..28700..28800..28900..29000..29100..29200..29300..29400..29500..29600..29700..29800..29900..30000..30100..30200..30300..30400..30500..30600.."
     ]
    }
   ],
   "source": [
    "user_fav = {}\n",
    "\n",
    "for index,(user_id,groupby_userid) in enumerate(df_rating_new.groupby('user_id')):\n",
    "    movies_rating = groupby_userid.groupby('movie_index')['rating'].mean().sort_values(ascending=False)\n",
    "    movie_indexs = set(movies_rating[\n",
    "        movies_rating >= 3\n",
    "    ].index.tolist())\n",
    "    user_fav[user_id] = movie_indexs\n",
    "    if index % 100 == 0:print(index,end='..')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 计算准确率和召回率"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "准确率 0.0721781590351196\n",
      "召回率 0.16878049831774863\n"
     ]
    }
   ],
   "source": [
    "union_quantity = 0  # 为用户推荐的，用户也喜欢的个数   \n",
    "recommend_quantity = 0 # 为用户推荐的商品总数\n",
    "fav_quantity = 0 # 用户喜欢的商品总数\n",
    "\n",
    "\n",
    "for user_id in user_recommend.keys():\n",
    "    if user_id in user_fav.keys():\n",
    "        union_quantity += len(\n",
    "            user_recommend[user_id] & user_fav[user_id]\n",
    "        )\n",
    "        recommend_quantity += len(user_recommend[user_id])\n",
    "        fav_quantity += len(user_fav[user_id])\n",
    "\n",
    "print('准确率',union_quantity / recommend_quantity)\n",
    "print('召回率',union_quantity / fav_quantity)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
